---
title: "Untitled"
author: "melissa_barales"
output: pdf_document
---

```{r}
setwd("~/Desktop/Senior Thesis/Census")
```

```{r}
library(haven)
library(dplyr)
library(tidyverse)
```

```{r}
#loading in dataset - ACS w/ puma
dat_one <- readRDS("Merged_ACS.rds")
```

```{r}
#loading in dataset - ACS w/ demographic variables
dat_two <- readRDS("ACS_demographic.rds")
```

```{r}
#loading in dataset containing race and familyincome 
dat_three <- read_dta("usa_00012.dta")
```

```{r}
#Creating a  unique identifier that combines SAMPLE, SERIAL, and PERNUM
dat_three <- dat_three %>%
  unite("unique_id", sample, serial, pernum, sep= "-", 
        remove = FALSE)
```

```{r}
#filtering dataset to a few variables 
dat_three <- dat_three %>% select(unique_id, 
                                  race, 
                                  ftotinc, 
                                  citizen, 
                                  incwelfr)
```

```{r}
#Merging two datasets
Pooled_dat <- dat_one %>% left_join(dat_two, by="unique_id")
```

```{r}
#Merging datasets
Pooled_dat_two <- Pooled_dat %>% left_join(dat_three, by="unique_id")
```

```{r}
#Loading in Hispanic dataset 
```

```{r}
His_origin <- read_dta("Hispanic.dta")
```

```{r}
His_origin <- His_origin %>%
  unite("unique_id", sample, serial, pernum, sep= "-", 
        remove = FALSE)
```

```{r}
#filtering Hispanic dataset
His_origin <- His_origin %>% select(unique_id, hispand)
```

```{r}
#Merging datasets
Pooled_dat_three <- Pooled_dat_two %>% 
  left_join(His_origin, by="unique_id")
```

```{r}
#some of the variables repeat themselves
#selecting non-repeats
```

```{r}
merged_ACS <- select(Pooled_dat_three, "multyear.x", "unique_id",
                     "sample.x", "serial.x", "hhwt.x",
                     "cluster.x", "puma_2", "statefip",
                     "countyfip", "met2013", "puma", 
                     "strata.x", "perwt.x",
                     "bpl", "bpld", "yrimmig", 
                     "language", "languaged", "bpl_mom", "bpl_mom_new",
                     "bpl_pop", "language_mom", "language_pop", 
                     "ethnicity", "statefip_name", "year", "hcovany",
                     "cbserial", "density", 
                     "hhincome", "foodstmp", "sex", 
                     "age", "educ", "educd", 
                     "empstat", "empstatd", "incwage", "ftotinc",
                     "citizen", "incwelfr", "hispand")
```

```{r}
#Exporting file of 2 ACS files
saveRDS(merged_ACS, "~/Desktop/Senior Thesis/Census/Merged_ACS.rds")
```

```{r}
#Calculating proporiton of ethnicities by puma_2
sums <- merged_ACS %>% 
  group_by(puma_2, ethnicity) %>% 
  dplyr::summarize(persons=sum(perwt.x, na.rm = TRUE)) %>% 
  dplyr::mutate(prop=persons/sum(persons, na.rm = TRUE)) %>%
  dplyr::mutate(rank=min_rank(x=-prop)) %>%
  dplyr::arrange(puma_2, rank)
```

```{r}
#Subsetting pumas to those that have 30% of a given ethnicity or more
prop_over30 <- subset(sums, sums$prop>=0.30)  
```

```{r}
table(prop_over30$ethnicity,useNA = "always")
``` 
